Load Libraries

#NLP Libraries
library(rJava)
library(openNLP)
library(NLP)


#Tidy data manipulation
library(stringr)
library(dplyr)
library(tidyr)
library(tidytext)
library(readr)
library(stringi)
library(textclean)

#Corpus ingest
library(gutenbergr)

#Helper library
library(sqldf)
## Warning in doTryCatch(return(expr), name, parentenv, handler): unable to load shared object '/Library/Frameworks/R.framework/Resources/modules//R_X11.so':
##   dlopen(/Library/Frameworks/R.framework/Resources/modules//R_X11.so, 6): Library not loaded: /opt/X11/lib/libSM.6.dylib
##   Referenced from: /Library/Frameworks/R.framework/Versions/4.1/Resources/modules/R_X11.so
##   Reason: image not found
#Graphics library
library(ggiraphExtra)
library(ggplot2)
library(RColorBrewer)
library(scales)
eliot_entities <- read.csv("entities_clean_eliot.csv")
gaskell_entities <- read.csv("entities_clean_gaskell.csv")
eliot_entities_cleaner <- read.csv("entities_cleaner_eliot.csv")

Unnesting sentences and drop stopwords

We can now unnest sentences, and get a view of the words that “surround” the entities. Then we create a separate table of sentiments by sentence, following which we recombine the columns of sentiments with the original entities, and drop the repeat values. We do this for both authors

entities_gaskell_unnest <- gaskell_entities %>%
  unnest_tokens(word, sentence) %>%
  anti_join(stop_words)

entities_gaskell_sentiment <- entities_gaskell_unnest %>%
  group_by(author, title) %>%
  inner_join(get_sentiments("nrc")) %>%
  count(sentence_nr, sentiment) %>%
  spread(sentiment, n, fill = 0) %>%
  mutate(sentiment = positive - negative)

entities_matches_sentiment_gaskell <- entities_gaskell_unnest %>%
  inner_join(entities_gaskell_sentiment) %>%
  distinct_at(vars(-word))  

ner_total_sentiment_gaskell <- entities_matches_sentiment_gaskell %>%
  group_by(author, title, words, kind) %>%
  summarise(total = mean(sentiment), appearance = n())
entities_eliot_unnest <- eliot_entities %>%
  unnest_tokens(word, sentence) %>%
  anti_join(stop_words)

entities_eliot_sentiment <- entities_eliot_unnest %>%
  group_by(author, title) %>%
  inner_join(get_sentiments("nrc")) %>%
  count(sentence_nr, sentiment) %>%
  spread(sentiment, n, fill = 0) %>%
  mutate(sentiment = positive - negative)

entities_matches_sentiment_eliot <- entities_eliot_unnest %>%
  inner_join(entities_eliot_sentiment) %>%
  distinct_at(vars(-word))  

ner_total_sentiment_eliot <- entities_matches_sentiment_eliot %>%
  group_by(author, title, words, kind) %>%
  summarise(total = mean(sentiment), appearance = n())

This is for the cleaner version of Middlemarch with Mr Malice, etc

entities_eliot_unnest_cleaner <- eliot_entities_cleaner %>%
  unnest_tokens(word, sentence) %>%
  anti_join(stop_words)

entities_eliot_sentiment_cleaner <- entities_eliot_unnest_cleaner %>%
  group_by(author, title) %>%
  inner_join(get_sentiments("nrc")) %>%
  count(sentence_nr, sentiment) %>%
  spread(sentiment, n, fill = 0) %>%
  mutate(sentiment = positive - negative)

entities_matches_sentiment_eliot_cleaner <- entities_eliot_unnest_cleaner %>%
  inner_join(entities_eliot_sentiment_cleaner) %>%
  distinct_at(vars(-word))  

ner_total_sentiment_eliot_cleaner <- entities_matches_sentiment_eliot_cleaner %>%
  group_by(author, title, words, kind) %>%
  summarise(total = mean(sentiment), appearance = n())

eliot_middlemarch_cleaner <- ner_total_sentiment_eliot_cleaner %>%
  filter(title == "Middlemarch")

entities_matches_sentiment_middlemarch_cleaner <- entities_matches_sentiment_eliot_cleaner %>%
  filter(title == "Middlemarch")
eliot_middlemarch <- ner_total_sentiment_eliot %>%
  filter(title == "Middlemarch")
eliot_adam_bede <- ner_total_sentiment_eliot %>%
  filter(title == "Adam Bede")
eliot_silas_marner <- ner_total_sentiment_eliot %>%
  filter(title == "Silas Marner")
eliot_daniel_deronda <- ner_total_sentiment_eliot %>%
  filter(title == "Daniel Deronda")
eliot_the_mill_on_the_floss <- ner_total_sentiment_eliot %>%
  filter(title == "The Mill on the Floss")
eliot_romola <- ner_total_sentiment_eliot %>%
  filter(title == "Romola")
eliot_felix_holt <- ner_total_sentiment_eliot %>%
  filter(title == "Felix Holt, the Radical")

entities_matches_sentiment_middlemarch <- entities_matches_sentiment_eliot %>%
  filter(title == "Middlemarch")
entities_matches_sentiment_adam_bede <- entities_matches_sentiment_eliot %>%
  filter(title == "Adam Bede")
entities_matches_sentiment_silas_marner <- entities_matches_sentiment_eliot %>%
  filter(title == "Silas Marner")
entities_matches_sentiment_daniel_deronda <- entities_matches_sentiment_eliot %>%
  filter(title == "Daniel Deronda")
entities_matches_sentiment_the_mill_on_the_floss <- entities_matches_sentiment_eliot %>%
  filter(title == "The Mill on the Floss")
entities_matches_sentiment_romola <- entities_matches_sentiment_eliot %>%
  filter(title == "Romola")
entities_matches_sentiment_felix_holt <- entities_matches_sentiment_eliot %>%
  filter(title == "Felix Holt, the Radical")

Part 6: Visualization

Now that we have the data we can actually visualize it.

Chart of the top 10

We can chart the top 10 characters based on their positive sentiment. Note that the characters may or may not be positive. This is merely describing their context.

eliot_middlemarch %>%
  group_by(title) %>%
  filter(kind == "person") %>%
  top_n(total, n=10) %>%
  mutate(words = reorder(words, total)) %>%
  ggplot(aes(words, y = total, fill = title)) +
  geom_col() +
  facet_wrap(~ title, scales = "free") +
  coord_flip()

eliot_adam_bede %>%
  group_by(title) %>%
  filter(kind == "person") %>%
  top_n(total, n=10) %>%
  mutate(words = reorder(words, total)) %>%
  ggplot(aes(words, y = total, fill = title)) +
  geom_col() +
  facet_wrap(~ title, scales = "free") +
  coord_flip()

eliot_silas_marner %>%
  group_by(title) %>%
  filter(kind == "person") %>%
  top_n(total, n=10) %>%
  mutate(words = reorder(words, total)) %>%
  ggplot(aes(words, y = total, fill = title)) +
  geom_col() +
  facet_wrap(~ title, scales = "free") +
  coord_flip()

eliot_daniel_deronda %>%
  group_by(title) %>%
  filter(kind == "person") %>%
  top_n(total, n=10) %>%
  mutate(words = reorder(words, total)) %>%
  ggplot(aes(words, y = total, fill = title)) +
  geom_col() +
  facet_wrap(~ title, scales = "free") +
  coord_flip()

eliot_the_mill_on_the_floss %>%
  group_by(title) %>%
  filter(kind == "person") %>%
  top_n(total, n=10) %>%
  mutate(words = reorder(words, total)) %>%
  ggplot(aes(words, y = total, fill = title)) +
  geom_col() +
  facet_wrap(~ title, scales = "free") +
  coord_flip()

eliot_romola %>%
  group_by(title) %>%
  filter(kind == "person") %>%
  top_n(total, n=10) %>%
  mutate(words = reorder(words, total)) %>%
  ggplot(aes(words, y = total, fill = title)) +
  geom_col() +
  facet_wrap(~ title, scales = "free") +
  coord_flip()

eliot_felix_holt %>%
  group_by(title) %>%
  filter(kind == "person") %>%
  top_n(total, n=10) %>%
  mutate(words = reorder(words, total)) %>%
  ggplot(aes(words, y = total, fill = title)) +
  geom_col() +
  facet_wrap(~ title, scales = "free") +
  coord_flip()

Chart bottom 10

We can also map out the bottom ten by reversing the top_n to a negative number top_n(-10).

eliot_middlemarch %>%
  group_by(title) %>%
  filter(kind == "person") %>%
  top_n(total, n=-10) %>%
  mutate(words = reorder(words, total)) %>%
  ggplot(aes(words, y = total, fill = title)) +
  geom_col() +
  facet_wrap(~ title, scales = "free") +
  coord_flip()

eliot_adam_bede %>%
  group_by(title) %>%
  filter(kind == "person") %>%
  top_n(total, n=-10) %>%
  mutate(words = reorder(words, total)) %>%
  ggplot(aes(words, y = total, fill = title)) +
  geom_col() +
  facet_wrap(~ title, scales = "free") +
  coord_flip()

eliot_silas_marner %>%
  group_by(title) %>%
  filter(kind == "person") %>%
  top_n(total, n=-10) %>%
  mutate(words = reorder(words, total)) %>%
  ggplot(aes(words, y = total, fill = title)) +
  geom_col() +
  facet_wrap(~ title, scales = "free") +
  coord_flip()

eliot_daniel_deronda %>%
  group_by(title) %>%
  filter(kind == "person") %>%
  top_n(total, n=-10) %>%
  mutate(words = reorder(words, total)) %>%
  ggplot(aes(words, y = total, fill = title)) +
  geom_col() +
  facet_wrap(~ title, scales = "free") +
  coord_flip()

eliot_the_mill_on_the_floss %>%
  group_by(title) %>%
  filter(kind == "person") %>%
  top_n(total, n=-10) %>%
  mutate(words = reorder(words, total)) %>%
  ggplot(aes(words, y = total, fill = title)) +
  geom_col() +
  facet_wrap(~ title, scales = "free") +
  coord_flip()

eliot_romola %>%
  group_by(title) %>%
  filter(kind == "person") %>%
  top_n(total, n=-10) %>%
  mutate(words = reorder(words, total)) %>%
  ggplot(aes(words, y = total, fill = title)) +
  geom_col() +
  facet_wrap(~ title, scales = "free") +
  coord_flip()

eliot_felix_holt %>%
  group_by(title) %>%
  filter(kind == "person") %>%
  top_n(total, n=-10) %>%
  mutate(words = reorder(words, total)) %>%
  ggplot(aes(words, y = total, fill = title)) +
  geom_col() +
  facet_wrap(~ title, scales = "free") +
  coord_flip()

We can see that the graph in Middlemarch is skewed because of the naming of characters - insert explanation here probably. For this reason, we removed these characters to hopefully show us a more accurate representation of the negative characters.

eliot_middlemarch_cleaner %>%
  group_by(title) %>%
  filter(kind == "person") %>%
  top_n(total, n=-10) %>%
  mutate(words = reorder(words, total)) %>%
  ggplot(aes(words, y = total, fill = title)) +
  geom_col() +
  facet_wrap(~ title, scales = "free") +
  coord_flip()

Chart of top characters by appearance

The above sentiment tends to favor the more minor characters, who appear less often and therefore their emotion is evened out less. We can also figure out the emotions of the main characters.

eliot_middlemarch %>%
  group_by(author, title) %>%
  filter(kind == "person") %>%
  top_n(appearance, n=10) %>%
  mutate(words = reorder(words, appearance)) %>%
  ggplot(aes(words, y = total, fill = title)) +
  geom_col() +
  facet_wrap(~ title, scales = "free") +
  coord_flip()

eliot_adam_bede %>%
  group_by(author, title) %>%
  filter(kind == "person") %>%
  top_n(appearance, n=10) %>%
  mutate(words = reorder(words, appearance)) %>%
  ggplot(aes(words, y = total, fill = title)) +
  geom_col() +
  facet_wrap(~ title, scales = "free") +
  coord_flip()

eliot_silas_marner %>%
  group_by(author, title) %>%
  filter(kind == "person") %>%
  top_n(appearance, n=10) %>%
  mutate(words = reorder(words, appearance)) %>%
  ggplot(aes(words, y = total, fill = title)) +
  geom_col() +
  facet_wrap(~ title, scales = "free") +
  coord_flip()

eliot_daniel_deronda %>%
  group_by(author, title) %>%
  filter(kind == "person") %>%
  top_n(appearance, n=10) %>%
  mutate(words = reorder(words, appearance)) %>%
  ggplot(aes(words, y = total, fill = title)) +
  geom_col() +
  facet_wrap(~ title, scales = "free") +
  coord_flip()

eliot_the_mill_on_the_floss %>%
  group_by(author, title) %>%
  filter(kind == "person") %>%
  top_n(appearance, n=10) %>%
  mutate(words = reorder(words, appearance)) %>%
  ggplot(aes(words, y = total, fill = title)) +
  geom_col() +
  facet_wrap(~ title, scales = "free") +
  coord_flip()

eliot_romola %>%
  group_by(author, title) %>%
  filter(kind == "person") %>%
  top_n(appearance, n=10) %>%
  mutate(words = reorder(words, appearance)) %>%
  ggplot(aes(words, y = total, fill = title)) +
  geom_col() +
  facet_wrap(~ title, scales = "free") +
  coord_flip()

eliot_felix_holt %>%
  group_by(author, title) %>%
  filter(kind == "person") %>%
  top_n(appearance, n=10) %>%
  mutate(words = reorder(words, appearance)) %>%
  ggplot(aes(words, y = total, fill = title)) +
  geom_col() +
  facet_wrap(~ title, scales = "free") +
  coord_flip()

Individual sentiments

The variable entities_matches_sentiment also houses the other NRC emotions. We can put these individual emotions on a radar plot.

Note that this table still contains the values positive, negative, and sentiment. It is useful to keep these around for a bit, just so we can sort out what we want to look at.

Ranking by “total” emotion

There is no way to intelligible map over 100 entities. You want to break them down in some meaningful way. One way to look at it is by raw character prominence. That is, who is the character the most surrounded by the most emotion words. The function below does just that.

  radar_facet <- entities_matches_sentiment_middlemarch %>%
  select(-positive, -negative, -sentiment) %>% #drop out the unnecessary columns
  filter(kind == "person") %>%
  group_by(title, words, kind) %>%
  summarise(across(anger:trust, sum)) %>%
  mutate(total = rowSums(across(where(is.numeric))))  %>%
  arrange(desc(total)) %>%
  head(10)  %>% #Change number to include more or fewer entities
  mutate(across(anger:trust, .fns = ~ round((. / total) * 100))) %>%
  select(-total,-kind)

  ggRadar(
  data = radar_facet,
  mapping = aes(color = title, facet = words),
  rescale = FALSE,
  interactive = TRUE,
  use.label = TRUE,
  size = 2,
  legend.position = "right"
)
radar_facet <- entities_matches_sentiment_adam_bede %>%
  select(-positive, -negative, -sentiment) %>% #drop out the unnecessary columns
  filter(kind == "person") %>%
  group_by(title, words, kind) %>%
  summarise(across(anger:trust, sum)) %>%
  mutate(total = rowSums(across(where(is.numeric))))  %>%
  arrange(desc(total)) %>%
  head(10)  %>% #Change number to include more or fewer entities
  mutate(across(anger:trust, .fns = ~ round((. / total) * 100))) %>%
  select(-total,-kind)

  ggRadar(
  data = radar_facet,
  mapping = aes(color = title, facet = words),
  rescale = FALSE,
  interactive = TRUE,
  use.label = TRUE,
  size = 2,
  legend.position = "right"
)
radar_facet <- entities_matches_sentiment_silas_marner %>%
  select(-positive, -negative, -sentiment) %>% #drop out the unnecessary columns
  filter(kind == "person") %>%
  group_by(title, words, kind) %>%
  summarise(across(anger:trust, sum)) %>%
  mutate(total = rowSums(across(where(is.numeric))))  %>%
  arrange(desc(total)) %>%
  head(10)  %>% #Change number to include more or fewer entities
  mutate(across(anger:trust, .fns = ~ round((. / total) * 100))) %>%
  select(-total,-kind)

  ggRadar(
  data = radar_facet,
  mapping = aes(color = title, facet = words),
  rescale = FALSE,
  interactive = TRUE,
  use.label = TRUE,
  size = 2,
  legend.position = "right"
)
radar_facet <- entities_matches_sentiment_daniel_deronda %>%
  select(-positive, -negative, -sentiment) %>% #drop out the unnecessary columns
  filter(kind == "person") %>%
  group_by(title, words, kind) %>%
  summarise(across(anger:trust, sum)) %>%
  mutate(total = rowSums(across(where(is.numeric))))  %>%
  arrange(desc(total)) %>%
  head(10)  %>% #Change number to include more or fewer entities
  mutate(across(anger:trust, .fns = ~ round((. / total) * 100))) %>%
  select(-total,-kind)

  ggRadar(
  data = radar_facet,
  mapping = aes(color = title, facet = words),
  rescale = FALSE,
  interactive = TRUE,
  use.label = TRUE,
  size = 2,
  legend.position = "right"
)
radar_facet <- entities_matches_sentiment_the_mill_on_the_floss %>%
  select(-positive, -negative, -sentiment) %>% #drop out the unnecessary columns
  filter(kind == "person") %>%
  group_by(title, words, kind) %>%
  summarise(across(anger:trust, sum)) %>%
  mutate(total = rowSums(across(where(is.numeric))))  %>%
  arrange(desc(total)) %>%
  head(10)  %>% #Change number to include more or fewer entities
  mutate(across(anger:trust, .fns = ~ round((. / total) * 100))) %>%
  select(-total,-kind)

  ggRadar(
  data = radar_facet,
  mapping = aes(color = title, facet = words),
  rescale = FALSE,
  interactive = TRUE,
  use.label = TRUE,
  size = 2,
  legend.position = "right"
)
radar_facet <- entities_matches_sentiment_romola %>%
  select(-positive, -negative, -sentiment) %>% #drop out the unnecessary columns
  filter(kind == "person") %>%
  group_by(title, words, kind) %>%
  summarise(across(anger:trust, sum)) %>%
  mutate(total = rowSums(across(where(is.numeric))))  %>%
  arrange(desc(total)) %>%
  head(10)  %>% #Change number to include more or fewer entities
  mutate(across(anger:trust, .fns = ~ round((. / total) * 100))) %>%
  select(-total,-kind)

  ggRadar(
  data = radar_facet,
  mapping = aes(color = title, facet = words),
  rescale = FALSE,
  interactive = TRUE,
  use.label = TRUE,
  size = 2,
  legend.position = "right"
)
radar_facet <- entities_matches_sentiment_felix_holt %>%
  select(-positive, -negative, -sentiment) %>% #drop out the unnecessary columns
  filter(kind == "person") %>%
  group_by(title, words, kind) %>%
  summarise(across(anger:trust, sum)) %>%
  mutate(total = rowSums(across(where(is.numeric))))  %>%
  arrange(desc(total)) %>%
  head(10)  %>% #Change number to include more or fewer entities
  mutate(across(anger:trust, .fns = ~ round((. / total) * 100))) %>%
  select(-total,-kind)

  ggRadar(
  data = radar_facet,
  mapping = aes(color = title, facet = words),
  rescale = FALSE,
  interactive = TRUE,
  use.label = TRUE,
  size = 2,
  legend.position = "right"
)

Radar plot by the highest positive or negative emotion

We can also map the emotions by looking at the largest positive and negative sentiment. To switch from positive to negative, simply change head(5) to tail(5). This means that instead of grabbing the top of the table we are grabbing the bottom. Since the table is organized in descending order, we’ll get the lowest values based on sentiment.

This is the positive sentiment:

radar_facet_sentiment <- entities_matches_sentiment_middlemarch %>%
  #Change filter to locations for locations
  filter(kind == "person") %>%
  group_by(title, words, kind) %>%
  summarise(across(anger:sentiment, sum)) %>%
  arrange(desc(sentiment))  %>%
  head(10)  %>% #Change number to include more or fewer entities
  select(-positive, -negative, -sentiment, -kind)

ggRadar(
  data = radar_facet_sentiment,
  mapping = aes(color = title, facet = words),
  rescale = FALSE,
  interactive = TRUE,
  use.label = TRUE,
  size = 2,
  legend.position = "right"
)
radar_facet_sentiment <- entities_matches_sentiment_adam_bede %>%
  #Change filter to locations for locations
  filter(kind == "person") %>%
  group_by(title, words, kind) %>%
  summarise(across(anger:sentiment, sum)) %>%
  arrange(desc(sentiment))  %>%
  head(10)  %>% #Change number to include more or fewer entities
  select(-positive, -negative, -sentiment, -kind)

ggRadar(
  data = radar_facet_sentiment,
  mapping = aes(color = title, facet = words),
  rescale = FALSE,
  interactive = TRUE,
  use.label = TRUE,
  size = 2,
  legend.position = "right"
)
radar_facet_sentiment <- entities_matches_sentiment_silas_marner %>%
  #Change filter to locations for locations
  filter(kind == "person") %>%
  group_by(title, words, kind) %>%
  summarise(across(anger:sentiment, sum)) %>%
  arrange(desc(sentiment))  %>%
  head(10)  %>% #Change number to include more or fewer entities
  select(-positive, -negative, -sentiment, -kind)

ggRadar(
  data = radar_facet_sentiment,
  mapping = aes(color = title, facet = words),
  rescale = FALSE,
  interactive = TRUE,
  use.label = TRUE,
  size = 2,
  legend.position = "right"
)
radar_facet_sentiment <- entities_matches_sentiment_daniel_deronda %>%
  #Change filter to locations for locations
  filter(kind == "person") %>%
  group_by(title, words, kind) %>%
  summarise(across(anger:sentiment, sum)) %>%
  arrange(desc(sentiment))  %>%
  head(10)  %>% #Change number to include more or fewer entities
  select(-positive, -negative, -sentiment, -kind)

ggRadar(
  data = radar_facet_sentiment,
  mapping = aes(color = title, facet = words),
  rescale = FALSE,
  interactive = TRUE,
  use.label = TRUE,
  size = 2,
  legend.position = "right"
)
radar_facet_sentiment <- entities_matches_sentiment_the_mill_on_the_floss %>%
  #Change filter to locations for locations
  filter(kind == "person") %>%
  group_by(title, words, kind) %>%
  summarise(across(anger:sentiment, sum)) %>%
  arrange(desc(sentiment))  %>%
  head(10)  %>% #Change number to include more or fewer entities
  select(-positive, -negative, -sentiment, -kind)

ggRadar(
  data = radar_facet_sentiment,
  mapping = aes(color = title, facet = words),
  rescale = FALSE,
  interactive = TRUE,
  use.label = TRUE,
  size = 2,
  legend.position = "right"
)
radar_facet_sentiment <- entities_matches_sentiment_romola %>%
  #Change filter to locations for locations
  filter(kind == "person") %>%
  group_by(title, words, kind) %>%
  summarise(across(anger:sentiment, sum)) %>%
  arrange(desc(sentiment))  %>%
  head(10)  %>% #Change number to include more or fewer entities
  select(-positive, -negative, -sentiment, -kind)

ggRadar(
  data = radar_facet_sentiment,
  mapping = aes(color = title, facet = words),
  rescale = FALSE,
  interactive = TRUE,
  use.label = TRUE,
  size = 2,
  legend.position = "right"
)
radar_facet_sentiment <- entities_matches_sentiment_felix_holt %>%
  #Change filter to locations for locations
  filter(kind == "person") %>%
  group_by(title, words, kind) %>%
  summarise(across(anger:sentiment, sum)) %>%
  arrange(desc(sentiment))  %>%
  head(10)  %>% #Change number to include more or fewer entities
  select(-positive, -negative, -sentiment, -kind)

ggRadar(
  data = radar_facet_sentiment,
  mapping = aes(color = title, facet = words),
  rescale = FALSE,
  interactive = TRUE,
  use.label = TRUE,
  size = 2,
  legend.position = "right"
)

This is the negative sentiment:

radar_facet_sentiment <- entities_matches_sentiment_middlemarch %>%
  #Change filter to locations for locations
  filter(kind == "person") %>%
  group_by(title, words, kind) %>%
  summarise(across(anger:sentiment, sum)) %>%
  arrange(desc(sentiment))  %>%
  tail(10)  %>% #Change number to include more or fewer entities
  select(-positive, -negative, -sentiment, -kind)

ggRadar(
  data = radar_facet_sentiment,
  mapping = aes(color = title, facet = words),
  rescale = FALSE,
  interactive = TRUE,
  use.label = TRUE,
  size = 1,
  legend.position = "right"
)
radar_facet_sentiment <- entities_matches_sentiment_adam_bede %>%
  #Change filter to locations for locations
  filter(kind == "person") %>%
  group_by(title, words, kind) %>%
  summarise(across(anger:sentiment, sum)) %>%
  arrange(desc(sentiment))  %>%
  tail(10)  %>% #Change number to include more or fewer entities
  select(-positive, -negative, -sentiment, -kind)

ggRadar(
  data = radar_facet_sentiment,
  mapping = aes(color = title, facet = words),
  rescale = FALSE,
  interactive = TRUE,
  use.label = TRUE,
  size = 1,
  legend.position = "right"
)
radar_facet_sentiment <- entities_matches_sentiment_silas_marner %>%
  #Change filter to locations for locations
  filter(kind == "person") %>%
  group_by(title, words, kind) %>%
  summarise(across(anger:sentiment, sum)) %>%
  arrange(desc(sentiment))  %>%
  tail(10)  %>% #Change number to include more or fewer entities
  select(-positive, -negative, -sentiment, -kind)

ggRadar(
  data = radar_facet_sentiment,
  mapping = aes(color = title, facet = words),
  rescale = FALSE,
  interactive = TRUE,
  use.label = TRUE,
  size = 1,
  legend.position = "right"
)
radar_facet_sentiment <- entities_matches_sentiment_daniel_deronda %>%
  #Change filter to locations for locations
  filter(kind == "person") %>%
  group_by(title, words, kind) %>%
  summarise(across(anger:sentiment, sum)) %>%
  arrange(desc(sentiment))  %>%
  tail(10)  %>% #Change number to include more or fewer entities
  select(-positive, -negative, -sentiment, -kind)

ggRadar(
  data = radar_facet_sentiment,
  mapping = aes(color = title, facet = words),
  rescale = FALSE,
  interactive = TRUE,
  use.label = TRUE,
  size = 1,
  legend.position = "right"
)
radar_facet_sentiment <- entities_matches_sentiment_the_mill_on_the_floss %>%
  #Change filter to locations for locations
  filter(kind == "person") %>%
  group_by(title, words, kind) %>%
  summarise(across(anger:sentiment, sum)) %>%
  arrange(desc(sentiment))  %>%
  tail(10)  %>% #Change number to include more or fewer entities
  select(-positive, -negative, -sentiment, -kind)

ggRadar(
  data = radar_facet_sentiment,
  mapping = aes(color = title, facet = words),
  rescale = FALSE,
  interactive = TRUE,
  use.label = TRUE,
  size = 1,
  legend.position = "right"
)
radar_facet_sentiment <- entities_matches_sentiment_romola %>%
  #Change filter to locations for locations
  filter(kind == "person") %>%
  group_by(title, words, kind) %>%
  summarise(across(anger:sentiment, sum)) %>%
  arrange(desc(sentiment))  %>%
  tail(10)  %>% #Change number to include more or fewer entities
  select(-positive, -negative, -sentiment, -kind)

ggRadar(
  data = radar_facet_sentiment,
  mapping = aes(color = title, facet = words),
  rescale = FALSE,
  interactive = TRUE,
  use.label = TRUE,
  size = 1,
  legend.position = "right"
)
radar_facet_sentiment <- entities_matches_sentiment_felix_holt %>%
  #Change filter to locations for locations
  filter(kind == "person") %>%
  group_by(title, words, kind) %>%
  summarise(across(anger:sentiment, sum)) %>%
  arrange(desc(sentiment))  %>%
  tail(10)  %>% #Change number to include more or fewer entities
  select(-positive, -negative, -sentiment, -kind)

ggRadar(
  data = radar_facet_sentiment,
  mapping = aes(color = title, facet = words),
  rescale = FALSE,
  interactive = TRUE,
  use.label = TRUE,
  size = 1,
  legend.position = "right"
)

Here again the graphs are skewed for Middlemarch as a result of the character names themselves, so here’s a representation with those names removed.

radar_facet_sentiment <- entities_matches_sentiment_middlemarch_cleaner %>%
  #Change filter to locations for locations
  filter(kind == "person") %>%
  group_by(title, words, kind) %>%
  summarise(across(anger:sentiment, sum)) %>%
  arrange(desc(sentiment))  %>%
  tail(10)  %>% #Change number to include more or fewer entities
  select(-positive, -negative, -sentiment, -kind)
## `summarise()` has grouped output by 'title', 'words'. You can override using the `.groups` argument.
ggRadar(
  data = radar_facet_sentiment,
  mapping = aes(color = title, facet = words),
  rescale = FALSE,
  interactive = TRUE,
  use.label = TRUE,
  size = 1,
  legend.position = "right"
)

Now for Gaskell:

gaskell_mary_barton <- ner_total_sentiment_gaskell %>%
  filter(title == "Mary Barton")
gaskell_my_lady_ludlow <- ner_total_sentiment_gaskell %>%
  filter(title == "My Lady Ludlow")
gaskell_a_dark_night_work <- ner_total_sentiment_gaskell %>%
  filter(title == "A Dark Night's Work")
gaskell_ruth <- ner_total_sentiment_gaskell %>%
  filter(title == "Ruth")

entities_matches_sentiment_mary_barton <- entities_matches_sentiment_gaskell %>%
  filter(title == "Mary Barton")
entities_matches_sentiment_my_lady_ludlow <- entities_matches_sentiment_gaskell %>%
  filter(title == "My Lady Ludlow")
entities_matches_sentiment_a_dark_night_work <- entities_matches_sentiment_gaskell %>%
  filter(title == "A Dark Night's Work")
entities_matches_sentiment_Ruth <- entities_matches_sentiment_gaskell %>%
  filter(title == "Ruth")

top 10 characters based on their positive sentiment

gaskell_mary_barton %>%
  group_by(title) %>%
  filter(kind == "person") %>%
  top_n(total, n=10) %>%
  mutate(words = reorder(words, total)) %>%
  ggplot(aes(words, y = total, fill = title)) +
  geom_col() +
  facet_wrap(~ title, scales = "free") +
  coord_flip()

gaskell_my_lady_ludlow %>%
  group_by(title) %>%
  filter(kind == "person") %>%
  top_n(total, n=10) %>%
  mutate(words = reorder(words, total)) %>%
  ggplot(aes(words, y = total, fill = title)) +
  geom_col() +
  facet_wrap(~ title, scales = "free") +
  coord_flip()

gaskell_a_dark_night_work %>%
  group_by(title) %>%
  filter(kind == "person") %>%
  top_n(total, n=10) %>%
  mutate(words = reorder(words, total)) %>%
  ggplot(aes(words, y = total, fill = title)) +
  geom_col() +
  facet_wrap(~ title, scales = "free") +
  coord_flip()

gaskell_ruth %>%
  group_by(title) %>%
  filter(kind == "person") %>%
  top_n(total, n=10) %>%
  mutate(words = reorder(words, total)) %>%
  ggplot(aes(words, y = total, fill = title)) +
  geom_col() +
  facet_wrap(~ title, scales = "free") +
  coord_flip()

top 10 characters based on their negative sentiment

gaskell_mary_barton %>%
  group_by(title) %>%
  filter(kind == "person") %>%
  top_n(total, n=-10) %>%
  mutate(words = reorder(words, total)) %>%
  ggplot(aes(words, y = total, fill = title)) +
  geom_col() +
  facet_wrap(~ title, scales = "free") +
  coord_flip()

gaskell_my_lady_ludlow %>%
  group_by(title) %>%
  filter(kind == "person") %>%
  top_n(total, n=-10) %>%
  mutate(words = reorder(words, total)) %>%
  ggplot(aes(words, y = total, fill = title)) +
  geom_col() +
  facet_wrap(~ title, scales = "free") +
  coord_flip()

gaskell_a_dark_night_work %>%
  group_by(title) %>%
  filter(kind == "person") %>%
  top_n(total, n=-10) %>%
  mutate(words = reorder(words, total)) %>%
  ggplot(aes(words, y = total, fill = title)) +
  geom_col() +
  facet_wrap(~ title, scales = "free") +
  coord_flip()

gaskell_ruth %>%
  group_by(title) %>%
  filter(kind == "person") %>%
  top_n(total, n=-10) %>%
  mutate(words = reorder(words, total)) %>%
  ggplot(aes(words, y = total, fill = title)) +
  geom_col() +
  facet_wrap(~ title, scales = "free") +
  coord_flip()

Emotions of the main characters (ie the ones with the most “appearances”):

gaskell_mary_barton %>%
  group_by(author, title) %>%
  filter(kind == "person") %>%
  top_n(appearance, n=10) %>%
  mutate(words = reorder(words, appearance)) %>%
  ggplot(aes(words, y = total, fill = title)) +
  geom_col() +
  facet_wrap(~ title, scales = "free") +
  coord_flip()

gaskell_my_lady_ludlow %>%
  group_by(author, title) %>%
  filter(kind == "person") %>%
  top_n(appearance, n=10) %>%
  mutate(words = reorder(words, appearance)) %>%
  ggplot(aes(words, y = total, fill = title)) +
  geom_col() +
  facet_wrap(~ title, scales = "free") +
  coord_flip()

gaskell_a_dark_night_work %>%
  group_by(author, title) %>%
  filter(kind == "person") %>%
  top_n(appearance, n=10) %>%
  mutate(words = reorder(words, appearance)) %>%
  ggplot(aes(words, y = total, fill = title)) +
  geom_col() +
  facet_wrap(~ title, scales = "free") +
  coord_flip()

gaskell_ruth %>%
  group_by(author, title) %>%
  filter(kind == "person") %>%
  top_n(appearance, n=10) %>%
  mutate(words = reorder(words, appearance)) %>%
  ggplot(aes(words, y = total, fill = title)) +
  geom_col() +
  facet_wrap(~ title, scales = "free") +
  coord_flip()

Ranking by “total” emotion

That is, who is the character the most surrounded by the most emotion words.

  radar_facet <- entities_matches_sentiment_mary_barton %>%
  select(-positive, -negative, -sentiment) %>% #drop out the unnecessary columns
  filter(kind == "person") %>%
  group_by(title, words, kind) %>%
  summarise(across(anger:trust, sum)) %>%
  mutate(total = rowSums(across(where(is.numeric))))  %>%
  arrange(desc(total)) %>%
  head(10)  %>% #Change number to include more or fewer entities
  mutate(across(anger:trust, .fns = ~ round((. / total) * 100))) %>%
  select(-total,-kind)

  ggRadar(
  data = radar_facet,
  mapping = aes(color = title, facet = words),
  rescale = FALSE,
  interactive = TRUE,
  use.label = TRUE,
  size = 2,
  legend.position = "right"
)
  radar_facet <- entities_matches_sentiment_my_lady_ludlow %>%
  select(-positive, -negative, -sentiment) %>% #drop out the unnecessary columns
  filter(kind == "person") %>%
  group_by(title, words, kind) %>%
  summarise(across(anger:trust, sum)) %>%
  mutate(total = rowSums(across(where(is.numeric))))  %>%
  arrange(desc(total)) %>%
  head(10)  %>% #Change number to include more or fewer entities
  mutate(across(anger:trust, .fns = ~ round((. / total) * 100))) %>%
  select(-total,-kind)

  ggRadar(
  data = radar_facet,
  mapping = aes(color = title, facet = words),
  rescale = FALSE,
  interactive = TRUE,
  use.label = TRUE,
  size = 2,
  legend.position = "right"
)
    radar_facet <- entities_matches_sentiment_a_dark_night_work %>%
  select(-positive, -negative, -sentiment) %>% #drop out the unnecessary columns
  filter(kind == "person") %>%
  group_by(title, words, kind) %>%
  summarise(across(anger:trust, sum)) %>%
  mutate(total = rowSums(across(where(is.numeric))))  %>%
  arrange(desc(total)) %>%
  head(10)  %>% #Change number to include more or fewer entities
  mutate(across(anger:trust, .fns = ~ round((. / total) * 100))) %>%
  select(-total,-kind)

  ggRadar(
  data = radar_facet,
  mapping = aes(color = title, facet = words),
  rescale = FALSE,
  interactive = TRUE,
  use.label = TRUE,
  size = 2,
  legend.position = "right"
)
    radar_facet <- entities_matches_sentiment_Ruth %>%
  select(-positive, -negative, -sentiment) %>% #drop out the unnecessary columns
  filter(kind == "person") %>%
  group_by(title, words, kind) %>%
  summarise(across(anger:trust, sum)) %>%
  mutate(total = rowSums(across(where(is.numeric))))  %>%
  arrange(desc(total)) %>%
  head(10)  %>% #Change number to include more or fewer entities
  mutate(across(anger:trust, .fns = ~ round((. / total) * 100))) %>%
  select(-total,-kind)

  ggRadar(
  data = radar_facet,
  mapping = aes(color = title, facet = words),
  rescale = FALSE,
  interactive = TRUE,
  use.label = TRUE,
  size = 2,
  legend.position = "right"
)

We can also map the emotions by looking at the largest positive and negative sentiment.

This is the positive sentiment:

radar_facet_sentiment <- entities_matches_sentiment_mary_barton %>%
  #Change filter to locations for locations
  filter(kind == "person") %>%
  group_by(title, words, kind) %>%
  summarise(across(anger:sentiment, sum)) %>%
  arrange(desc(sentiment))  %>%
  head(10)  %>% #Change number to include more or fewer entities
  select(-positive, -negative, -sentiment, -kind)

ggRadar(
  data = radar_facet_sentiment,
  mapping = aes(color = title, facet = words),
  rescale = FALSE,
  interactive = TRUE,
  use.label = TRUE,
  size = 2,
  legend.position = "right"
)
radar_facet_sentiment <- entities_matches_sentiment_my_lady_ludlow %>%
  #Change filter to locations for locations
  filter(kind == "person") %>%
  group_by(title, words, kind) %>%
  summarise(across(anger:sentiment, sum)) %>%
  arrange(desc(sentiment))  %>%
  head(10)  %>% #Change number to include more or fewer entities
  select(-positive, -negative, -sentiment, -kind)

ggRadar(
  data = radar_facet_sentiment,
  mapping = aes(color = title, facet = words),
  rescale = FALSE,
  interactive = TRUE,
  use.label = TRUE,
  size = 2,
  legend.position = "right"
)
radar_facet_sentiment <- entities_matches_sentiment_a_dark_night_work %>%
  #Change filter to locations for locations
  filter(kind == "person") %>%
  group_by(title, words, kind) %>%
  summarise(across(anger:sentiment, sum)) %>%
  arrange(desc(sentiment))  %>%
  head(10)  %>% #Change number to include more or fewer entities
  select(-positive, -negative, -sentiment, -kind)

ggRadar(
  data = radar_facet_sentiment,
  mapping = aes(color = title, facet = words),
  rescale = FALSE,
  interactive = TRUE,
  use.label = TRUE,
  size = 2,
  legend.position = "right"
)
radar_facet_sentiment <- entities_matches_sentiment_Ruth %>%
  #Change filter to locations for locations
  filter(kind == "person") %>%
  group_by(title, words, kind) %>%
  summarise(across(anger:sentiment, sum)) %>%
  arrange(desc(sentiment))  %>%
  head(10)  %>% #Change number to include more or fewer entities
  select(-positive, -negative, -sentiment, -kind)

ggRadar(
  data = radar_facet_sentiment,
  mapping = aes(color = title, facet = words),
  rescale = FALSE,
  interactive = TRUE,
  use.label = TRUE,
  size = 2,
  legend.position = "right"
)

This is the negative sentiment:

radar_facet_sentiment <- entities_matches_sentiment_mary_barton %>%
  #Change filter to locations for locations
  filter(kind == "person") %>%
  group_by(title, words, kind) %>%
  summarise(across(anger:sentiment, sum)) %>%
  arrange(desc(sentiment))  %>%
  tail(10)  %>% #Change number to include more or fewer entities
  select(-positive, -negative, -sentiment, -kind)

ggRadar(
  data = radar_facet_sentiment,
  mapping = aes(color = title, facet = words),
  rescale = FALSE,
  interactive = TRUE,
  use.label = TRUE,
  size = 2,
  legend.position = "right"
)
radar_facet_sentiment <- entities_matches_sentiment_my_lady_ludlow %>%
  #Change filter to locations for locations
  filter(kind == "person") %>%
  group_by(title, words, kind) %>%
  summarise(across(anger:sentiment, sum)) %>%
  arrange(desc(sentiment))  %>%
  tail(10)  %>% #Change number to include more or fewer entities
  select(-positive, -negative, -sentiment, -kind)

ggRadar(
  data = radar_facet_sentiment,
  mapping = aes(color = title, facet = words),
  rescale = FALSE,
  interactive = TRUE,
  use.label = TRUE,
  size = 2,
  legend.position = "right"
)
radar_facet_sentiment <- entities_matches_sentiment_a_dark_night_work %>%
  #Change filter to locations for locations
  filter(kind == "person") %>%
  group_by(title, words, kind) %>%
  summarise(across(anger:sentiment, sum)) %>%
  arrange(desc(sentiment))  %>%
  tail(10)  %>% #Change number to include more or fewer entities
  select(-positive, -negative, -sentiment, -kind)

ggRadar(
  data = radar_facet_sentiment,
  mapping = aes(color = title, facet = words),
  rescale = FALSE,
  interactive = TRUE,
  use.label = TRUE,
  size = 2,
  legend.position = "right"
)
radar_facet_sentiment <- entities_matches_sentiment_Ruth %>%
  #Change filter to locations for locations
  filter(kind == "person") %>%
  group_by(title, words, kind) %>%
  summarise(across(anger:sentiment, sum)) %>%
  arrange(desc(sentiment))  %>%
  tail(10)  %>% #Change number to include more or fewer entities
  select(-positive, -negative, -sentiment, -kind)

ggRadar(
  data = radar_facet_sentiment,
  mapping = aes(color = title, facet = words),
  rescale = FALSE,
  interactive = TRUE,
  use.label = TRUE,
  size = 2,
  legend.position = "right"
)